{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.017323Z",
     "start_time": "2020-03-21T02:06:27.405178Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import warnings\n",
    "import os\n",
    "from tqdm import tqdm\n",
    "from sklearn import preprocessing, metrics\n",
    "import lightgbm as lgb\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from joblib import Parallel, delayed\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', None)\n",
    "\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.027158Z",
     "start_time": "2020-03-21T02:06:28.018988Z"
    }
   },
   "outputs": [],
   "source": [
    "next_action = pd.read_csv('./temp/next_action.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.037318Z",
     "start_time": "2020-03-21T02:06:28.028192Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>98263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>116276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>153284</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>23009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id\n",
       "0   98263\n",
       "1  116276\n",
       "2  153284\n",
       "3   16134\n",
       "4   23009"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next_action.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.442488Z",
     "start_time": "2020-03-21T02:06:28.039872Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(223104, 12)\n",
      "(48277, 12)\n"
     ]
    }
   ],
   "source": [
    "df_feature = pd.read_pickle('./temp/base_feature.plk')\n",
    "print(df_feature.shape)\n",
    "df_feature = next_action.merge(df_feature, how='left')\n",
    "print(df_feature.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.451429Z",
     "start_time": "2020-03-21T02:06:28.443697Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "train    44058\n",
       "test      4219\n",
       "Name: type, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_feature['type'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.464082Z",
     "start_time": "2020-03-21T02:06:28.452870Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>courier_id</th>\n",
       "      <th>wave_index</th>\n",
       "      <th>tracking_id</th>\n",
       "      <th>courier_wave_start_lng</th>\n",
       "      <th>courier_wave_start_lat</th>\n",
       "      <th>action_type</th>\n",
       "      <th>expect_time</th>\n",
       "      <th>date</th>\n",
       "      <th>type</th>\n",
       "      <th>target</th>\n",
       "      <th>group</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>98263</td>\n",
       "      <td>10330725</td>\n",
       "      <td>9</td>\n",
       "      <td>2100075923187730175</td>\n",
       "      <td>121.481429</td>\n",
       "      <td>39.299365</td>\n",
       "      <td>PICKUP</td>\n",
       "      <td>1582888651</td>\n",
       "      <td>20200228</td>\n",
       "      <td>train</td>\n",
       "      <td>1.0</td>\n",
       "      <td>20200228103307259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>116276</td>\n",
       "      <td>10053442</td>\n",
       "      <td>1</td>\n",
       "      <td>2100075314534647435</td>\n",
       "      <td>121.479587</td>\n",
       "      <td>39.248115</td>\n",
       "      <td>PICKUP</td>\n",
       "      <td>1582084880</td>\n",
       "      <td>20200219</td>\n",
       "      <td>train</td>\n",
       "      <td>1.0</td>\n",
       "      <td>20200219100534421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>153284</td>\n",
       "      <td>118787313</td>\n",
       "      <td>4</td>\n",
       "      <td>2100075078536791439</td>\n",
       "      <td>121.440498</td>\n",
       "      <td>39.203471</td>\n",
       "      <td>DELIVERY</td>\n",
       "      <td>1581671584</td>\n",
       "      <td>20200214</td>\n",
       "      <td>train</td>\n",
       "      <td>1.0</td>\n",
       "      <td>202002141187873134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16134</td>\n",
       "      <td>116706233</td>\n",
       "      <td>3</td>\n",
       "      <td>2100074825841346124</td>\n",
       "      <td>121.543010</td>\n",
       "      <td>39.258822</td>\n",
       "      <td>DELIVERY</td>\n",
       "      <td>1581075896</td>\n",
       "      <td>20200207</td>\n",
       "      <td>train</td>\n",
       "      <td>1.0</td>\n",
       "      <td>202002071167062333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>23009</td>\n",
       "      <td>118333873</td>\n",
       "      <td>5</td>\n",
       "      <td>2100074746653279446</td>\n",
       "      <td>121.406669</td>\n",
       "      <td>39.364738</td>\n",
       "      <td>PICKUP</td>\n",
       "      <td>1580906387</td>\n",
       "      <td>20200205</td>\n",
       "      <td>train</td>\n",
       "      <td>1.0</td>\n",
       "      <td>202002051183338735</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id  courier_id  wave_index          tracking_id  \\\n",
       "0   98263    10330725           9  2100075923187730175   \n",
       "1  116276    10053442           1  2100075314534647435   \n",
       "2  153284   118787313           4  2100075078536791439   \n",
       "3   16134   116706233           3  2100074825841346124   \n",
       "4   23009   118333873           5  2100074746653279446   \n",
       "\n",
       "   courier_wave_start_lng  courier_wave_start_lat action_type  expect_time  \\\n",
       "0              121.481429               39.299365      PICKUP   1582888651   \n",
       "1              121.479587               39.248115      PICKUP   1582084880   \n",
       "2              121.440498               39.203471    DELIVERY   1581671584   \n",
       "3              121.543010               39.258822    DELIVERY   1581075896   \n",
       "4              121.406669               39.364738      PICKUP   1580906387   \n",
       "\n",
       "       date   type  target               group  \n",
       "0  20200228  train     1.0   20200228103307259  \n",
       "1  20200219  train     1.0   20200219100534421  \n",
       "2  20200214  train     1.0  202002141187873134  \n",
       "3  20200207  train     1.0  202002071167062333  \n",
       "4  20200205  train     1.0  202002051183338735  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_feature.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-03-21T02:06:28.560850Z",
     "start_time": "2020-03-21T02:06:28.465154Z"
    }
   },
   "outputs": [],
   "source": [
    "df_feature.to_pickle('./temp/part2_feature.plk')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:dm] *",
   "language": "python",
   "name": "conda-env-dm-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
